#Replication and Extension of Franck 2009
#May 2, 2013

#Table of Contents
#1. Importing and defining population of interest.
#2 cleaning and correcting the data.
#3. Winsorizing of data.
#4. Making Table 1 from Franck 2009, and presenting alternative country classification.
#5. Making Table 3 and alternative. 
#6. Replicating chi square models.
#7. Eliminating arbitrator nationality as a factor.
#8. Make Table 5 of Franck paper, and run associated model.
#9. De-winsorizing the data.
#10. Raw data with modified country classification.
#11. Running alternative model using other data.

############################

setwd("C:\\Users\\Todd Tucker\\Dropbox\\BIT stats\\Franck replication")

#1. Importing and defining population of interest.

#Need this to read .sav files
install.packages("memisc")
library(memisc)

#Import the file.
library(foreign)
franck<-read.spss("http://law.wlu.edu/faculty/facultydocuments/francks/dataset.sav",use.value.labels=TRUE,to.data.frame=TRUE)
attach(franck)

#This is the 49 subset of the 102 observations that is the subject of Franck 2009 - those that finally resolved the investor-state disputes.
final<-subset(franck,AwardFinality=="Final Award for All Issues")



#2 cleaning and correcting the data.

#I make a series of extractions in order to facilitate data analysis and replacement.
#Arbitrator nationality variable. (explanatory variable)
Arb_OECD<-final$ChairOECD
Arb_OECD<-as.numeric(Arb_OECD)

#The author miscoded several variables in her publicly available dataset, which she confirmed to me in an email on 3/8/13.
#According to her 2009 paper (note 111), arbitrator info was unavailable in Nykomb Synergistics Technology Holding AB v. Latvia, CCL v. Kazakhstan, and Nagel v. Czech Republic). 
#However, to get results in her tables, she seems to have assumed arbitrator nationality in Nagel and Nykomb, which was borne out by other sources:
#http://www.iiapp.org/arbitrator/bjorn-haug
#http://www.iiapp.org/arbitrator/hans-danelius/
#So I make a few substitutions

Arb_OECD[is.na(Arb_OECD)] <- 2
final[2,"ChairNatNu"]<-"Sweden"
levels(final$ChairNatNu) = c(levels(final$ChairNatNu), "Norway")
final[48,"ChairNatNu"]<-"Norway"

#Country nationality variable.(explanatory variable)
Country_OECD<-final$oecd_G
Country_OECD<-as.numeric(Country_OECD)

#Outcome of case, i.e. win or loss.(outcome variable)
Respwin<-final$Rwins
#Dollar amount awarded in case.(outcome variable)
Award_Amount<-final$CommonCurrencyUSDofReceivedDamages

#We then recode Respwin for easier manueverability, since it has long strings which crashes R.
Ultimatewinner<-as.numeric(Respwin)

#Bind the new columns onto final.
final<-cbind(final,Arb_OECD,Country_OECD,Ultimatewinner,Award_Amount)

#I noticed that the data for Award_Amount included two NAs, 
#that FN 272 of her 2007 paper actually indicates she gave values to.
#I replace these.
final[9,"Award_Amount"]<-2989636
final[39,"Award_Amount"]<-407646


#3 Winsorizing of data.

#Her paper uses winsorized data (to reduce skewness), 
#but the publicly available data does not include this procedure.  
# The process is described in Franck's 2011 paper, notes 153 to 155. 
#She says she reassigned the seven highest values to $5,675,537.50. 
#She does not appear to have reduced lower bound values.
#Not clear how this exact figure was identified, since this value does not show up in her data, 
#and doesn't seem to be the 75% quantile using quantile(Award_Amount,na.rm=T)

#I create a new numeric vector.
winsor<-final$Award_Amount

#I reassign all values higher than or equal to $5,675,537.50, to be equal to $5,675,537.50 
winsor<-recode(winsor,
               5675537.50<-winsor[(winsor>=5675537.50)],        
               otherwise="copy")

#Bind to final.
final<-cbind(final,winsor)



#4. Making Table 1 from Franck 2009, and presenting alternative country classification.

#Make Table 1.
Table1 <- addmargins(table(final$Arb_OECD, final$Country_OECD))
colnames(Table1) <-c("Non-OECD Respondent","OECD Respondent","Total")
rownames(Table1)<-c("Non-OECD Arbitrator","OECD Arbitrator","Total")

#Print the table.
library(xtable)
print.xtable((xtable(Table1, caption = "Table 1: Participants in IIA Arbitration", digits=0)), 
              type="html", file="Table1.html",digits=0)
#This reproduces correctly.

#Recoding with alternate country designation.

#At this point, I begin my personal elaboration of her findings, by introducing a new recoded variable.
#From here (http://www.imf.org/external/pubs/ft/weo/2013/01/pdf/statapp.pdf), 
#I extract the IMF advanced economies, and recode.
#However, I remove the Eastern Bloc and "nonWest" countries 
#(Slovak Republic,Estonia,Slovenia,Singapore, Czech Republic,Korea, Taiwan)
install.packages("datamart")
library(datamart)
Govt<-final$GovernmentN
Advanced <- strrecode(c("Austria$|Germany$|Netherlands$|Belgium$|Greece$|Portugal$|Cyprus$|Ireland$|Italy$|Finland$|
                        Luxembourg$|Spain$|France$|Malta$|Canada$|United States$|Japan$|United Kingdom$|Australia$|
                        Israel$|Sweden$|Denmark$|New Zealand$|Switzerland$|Hong Kong$|Norway$|Iceland$|San Marino"),
                      c(1),
                      Govt)
Advanced<-as.numeric(Advanced)
Advanced[is.na(Advanced)] <- 0
Advanced

Govta<-final$ChairNatNu
Advancedarb <- strrecode(c("Austria$|Germany$|Netherlands$|Belgium$|Greece$|Portugal$|Cyprus$|Ireland$|Italy$|Finland$|
                           Luxembourg$|Spain$|France$|Malta$|Canada$|United States$|Japan$|United Kingdom$|Australia$|
                           Israel$|Sweden$|Denmark$|New Zealand$|Switzerland$|Hong Kong$|Norway$|Iceland$|San Marino"),
                         c(1),
                         Govta)
Advancedarb<-as.numeric(Advancedarb)
Advancedarb[is.na(Advancedarb)] <- 0
Advancedarb

#bind these as new columns onto final
final<-cbind(final,Advanced,Advancedarb)

#Make Table 2, as alternative to Table 1.
Table2 <- addmargins(table(final$Advancedarb, final$Advanced))
colnames(Table2) <-c("Non-Western Respondent","Western Respondent","Total")
rownames(Table2)<-c("Non-Western Arbitrator","Western Arbitrator","Total")
print.xtable((xtable(Table2, caption = "Table 2: Participants in IIA Arbitration", digits=0)), 
             type="html", file="Table2.html",digits=0)


#5. Making Table 3 and alternative. 

#Make Table 3 from Franck's paper.
Table3<-ftable(xtabs(~Arb_OECD+Country_OECD+Ultimatewinner, data=final))
Table3<-addmargins(Table3)
colnames(Table3)<-c("Claimant Win","Respondent Win","Total")
rownames(Table3)<-c("Non-OECD Arb/Resp","Non-OECD Arb/OECD Resp","OECD Arb/Non-OECD Resp","OECD Arb/Resp","Total")
print.xtable((xtable(Table3, caption = "Table 3: Participants and Outcomes in IIA Arbitration", digits=0)), 
             type="html", file="Table3.html",digits=0)

#Make Table 4, as alternative to Table 3.
Table4<-ftable(xtabs(~Advancedarb+Advanced+Ultimatewinner, data=final))
Table4<-addmargins(Table4)
colnames(Table4)<-c("Claimant Win","Respondent Win","Total")
rownames(Table4)<-c("Non-Western Arb/Resp","Non-Western Arb/Western Resp","Western Arb/Non-Western Resp","Western Arb/Resp","Total")
print.xtable((xtable(Table4, caption = "Table 4: Participants and Outcomes in IIA Arbitration", digits=0)), 
             type="html", file="Table4.html",digits=0)



#6. Replicating chi square models.

install.packages("gmodels")
library(gmodels) 
library(car)

#Calculate chi-square for non-OECD arbitrators
conditionx<-Arb_OECD==2
conditiony<-Country_OECD==2

Model1<- CrossTable(Country_OECD[conditionx==F],Ultimatewinner[conditionx==F],
                    expected=TRUE,chisq=TRUE,prop.t=TRUE,prop.c=FALSE,prop.r=FALSE,
                    prop.chisq=FALSE,fisher=TRUE,format="SPSS")

#For non-OECD arbitrators,no pattern relationship (x2(1) =.258; p = .61; n = 13). 

#Calculate effect size
install.packages("vcd")
library(vcd)
phiModel1<-table(Country_OECD[conditionx==F],Ultimatewinner[conditionx==F])
assocstats(phiModel1)

#This produces an effect size of .0141, or potentially trivial under Cohen's conventions. 

#The problem with the foregoing is that several of her expected cell counts are under 5.
#sHE SHOULD HAVE used Fisher's exact test.

# Calculating a priori power.
install.packages("pwr")
library(pwr)
pwr.chisq.test(w=0.15,N=NULL,df=1,sig.level=0.05,power=0.80)

#BEcause she is trying to extend from a 2 by 2 to a 4 by 2 matrix, she claims to need to make an adjustment.
(349/2)*4
#This produces N=698.

#If I use the actual effect size rather than rounding,
pwr.chisq.test(w=0.141,N=NULL,df=1,sig.level=0.05,power=0.80)
#This gives a 395 for S.
(395/2)*4
#This gives N=790.

#For OECD arbitrators.
Model1a<- CrossTable(Country_OECD[conditionx==T],Ultimatewinner[conditionx==T],
                    expected=TRUE,chisq=TRUE,prop.t=TRUE,prop.c=FALSE,prop.r=FALSE,
                    prop.chisq=FALSE,fisher=TRUE,format="SPSS")
phiModel1a<-table(Country_OECD[conditionx==T],Ultimatewinner[conditionx==T])
assocstats(phiModel1a)
pwr.chisq.test(w=0.09,N=NULL,df=1,sig.level=0.05,power=0.80)
(969/2)*4

#Model 1b, alternative to her model 1 with recoded countries
conditiona<-Advancedarb==1
conditionb<-Advanced==1

#Non-advanced arbitrators.
Model1b<- CrossTable(Advanced[conditiona==F],Ultimatewinner[conditiona==F],
                    expected=TRUE,chisq=TRUE,prop.t=TRUE,prop.c=FALSE,prop.r=FALSE,
                    prop.chisq=FALSE,fisher=TRUE,format="SPSS")

phiModel1b<-table(Advanced[conditiona==F],Ultimatewinner[conditiona==F])
assocstats(phiModel1a)
pwr.chisq.test(w=0.09,N=NULL,df=1,sig.level=0.05,power=0.80)
(969/2)*4

#Advanced arbitrators.
Model1c<- CrossTable(Advanced[conditiona==T],Ultimatewinner[conditiona==T],
                     expected=TRUE,chisq=TRUE,prop.t=TRUE,prop.c=FALSE,prop.r=FALSE,
                     prop.chisq=FALSE,fisher=TRUE,format="SPSS")

phiModel1c<-table(Advanced[conditiona==T],Ultimatewinner[conditiona==T])
assocstats(phiModel1c)
pwr.chisq.test(w=0.015,N=NULL,df=1,sig.level=0.05,power=0.80)
(34883/2)*4


#7. Eliminating arbitrator nationality as a factor.

#Table 4a, with arbitrator nationality eliminated.
Table4a<-ftable(xtabs(~Advanced+Ultimatewinner, data=final))
Table4a<-addmargins(Table4a)
colnames(Table4a)<-c("Claimant Win","Respondent Win","Total")
rownames(Table4a)<-c("Non-Advanced Respondent","Advanced Respondent","Total")
print.xtable((xtable(Table4a, caption = "Table 4a: States and Outcomes in IIA Arbitration",digits=0)), 
             type="html", file="Table4a.html",digits=0)


Model1d<- CrossTable(Advanced,Ultimatewinner,
                     expected=TRUE,chisq=TRUE,prop.t=TRUE,prop.c=FALSE,prop.r=FALSE,
                     prop.chisq=FALSE,fisher=TRUE,format="SPSS")

phiModel1d<-table(Advanced,Ultimatewinner)
assocstats(phiModel1d)
pwr.chisq.test(w=0.015,N=NULL,df=1,sig.level=0.05,power=0.80)
(34883/2)*4





#8. Make Table 5 of Franck paper, and run associated model.

Table5r1<-c(mean(final$winsor[conditionx==F & conditiony==F]),
            sd(final$winsor[conditionx==F & conditiony==F]),
            sum(conditionx==F & conditiony==F))
Table5r2<-c(mean(final$winsor[conditionx==F & conditiony==T]),
            sd(final$winsor[conditionx==F & conditiony==T]),
            sum(conditionx==F & conditiony==T))           
Table5r3<-c(mean(final$winsor[conditionx==T & conditiony==F]),
            sd(final$winsor[conditionx==T & conditiony==F]),
            sum(conditionx==T & conditiony==F))           
Table5r4<-c(mean(final$winsor[conditionx==T & conditiony==T]),
            sd(final$winsor[conditionx==T & conditiony==T]),
            sum(conditionx==T & conditiony==T))
Table5r5<-c(0,0,nrow(final))
Table5<-rbind(Table5r1,Table5r2,Table5r3,Table5r4,Table5r5)
colnames(Table5)<-c("Mean","Standard Deviation","Total")
rownames(Table5)<-c("Non-OECD Arb/Resp","Non-OECD Arb/OECD Resp","OECD Arb/Non-OECD Resp","OECD Arb/Resp","Total")
Table5.toprint = Table5
Table5.toprint[,1:2] = paste("$", round(Table5.toprint[,1:2]), sep="")
Table5.toprint[5,1:2] = ""
print.xtable(xtable(prettyNum(Table5.toprint,digits=0,big.mark=","),caption = "Table 5: Participants and Damages Awarded in IIA Arbitration (Winsorized)"), 
             type="html", file="Table5.html",digits=0)

#Model 2, looking at dollar amount rewarded.

#Using guidance from Field book, p. 521, 543, and http://www.sagepub.com/dsur/study/DSUR%20R%20Script%20Files/Chapter%2012%20DSUR%20GLM3.R
Model2<-aov(winsor~Arb_OECD*Country_OECD,data=final)
library(car)
#So, if we set this to be something, we get a table to play with...
anova.testout = Anova(Model2,type="III")

omega_factorial<-function(n,a,b, SSa, SSb, SSab, SSr)
{
  MSa<-SSa/(a-1)
  MSb<-SSb/(b-1)
  MSab<-SSab/((a-1)*(b-1))
  MSr<-SSr/(a*b*(n-1))
  varA<-((a-1)*(MSa-MSr))/(n*a*b)
  varB<-((b-1)*(MSb-MSr))/(n*a*b)
  varAB<-((a-1)*(b-1)*(MSab-MSr))/(n*a*b)
  varTotal<-varA + varB + varAB + MSr
  
  print(paste("Omega-Squared A: ", varA/varTotal))
  print(paste("Omega-Squared B: ", varB/varTotal))
  print(paste("Omega-Squared AB: ", varAB/varTotal))
}
Model5effectw<-omega_factorial(49,2,2,
                               anova.testout[2,1],
                               anova.testout[3,1],
                               anova.testout[4,1],
                               anova.testout[5,1])

#Requisite power analysis for interaction. 
pwr.anova.test(f=anova.testout[4,3],k=4,power=0.80,sig.level=0.05)
# I get 4127.


#9. De-winsorizing the data.
#Make Non-winsorized table.
Table5ar1<-c(mean(final$Award_Amount[conditionx==F & conditiony==F]),
             sd(final$Award_Amount[conditionx==F & conditiony==F]),
             sum(conditionx==F & conditiony==F))
Table5ar2<-c(mean(final$Award_Amount[conditionx==F & conditiony==T]),
             sd(final$Award_Amount[conditionx==F & conditiony==T]),
             sum(conditionx==F & conditiony==T))           
Table5ar3<-c(mean(final$Award_Amount[conditionx==T & conditiony==F]),
             sd(final$Award_Amount[conditionx==T & conditiony==F]),
             sum(conditionx==T & conditiony==F))           
Table5ar4<-c(mean(final$Award_Amount[conditionx==T & conditiony==T]),
             sd(final$Award_Amount[conditionx==T & conditiony==T]),
             sum(conditionx==T & conditiony==T))
Table5ar5<-c(0,0,nrow(final))

Table5a<-rbind(Table5ar1,Table5ar2,Table5ar3,Table5ar4,Table5ar5)
colnames(Table5a)<-c("Mean","Standard Deviation","Total")
rownames(Table5a)<-c("Non-OECD Arb/Resp","Non-OECD Arb/OECD Resp","OECD Arb/Non-OECD Resp","OECD Arb/Resp","Total")
Table5a.toprint = Table5a
Table5a.toprint[,1:2] = paste("$", round(Table5a.toprint[,1:2]), sep="")
Table5a.toprint[5,1:2] = ""
print.xtable(xtable(prettyNum(Table5a.toprint,digits=0,big.mark=","),caption = "Table 5a: Participants and Damages Awarded in IIA Arbitration (Raw)"), 
             type="html", file="Table5a.html",digits=0)

#10. Raw data with modified country classification.

Table6r1<-c(mean(final$Award_Amount[conditiona==F & conditionb==F]),
             sd(final$Award_Amount[conditiona==F & conditionb==F]),
             sum(conditiona==F & conditionb==F))
Table6r2<-c(mean(final$Award_Amount[conditiona==F & conditionb==T]),
             sd(final$Award_Amount[conditiona==F & conditionb==T]),
             sum(conditiona==F & conditionb==T))           
Table6r3<-c(mean(final$Award_Amount[conditiona==T & conditionb==F]),
             sd(final$Award_Amount[conditiona==T & conditionb==F]),
             sum(conditiona==T & conditionb==F))           
Table6r4<-c(mean(final$Award_Amount[conditiona==T & conditionb==T]),
             sd(final$Award_Amount[conditiona==T & conditionb==T]),
             sum(conditiona==T & conditionb==T))
Table6r5<-c(0,0,nrow(final))
Table6<-rbind(Table6r1,Table6r2,Table6r3,Table6r4,Table6r5)
colnames(Table6)<-c("Mean","Standard Deviation","Total")
rownames(Table6)<-c("Non-Western Arb/Resp","Non-Western Arb/Advanced Resp",
                    "Western Arb/Non-Western Resp","Western Arb/Resp","Total")
Table6.toprint = Table6
Table6.toprint[,1:2] = paste("$", round(Table6.toprint[,1:2]), sep="")
Table6.toprint[5,1:2] = ""
print.xtable(xtable(prettyNum(Table6.toprint,digits=0,big.mark=","),caption = "Table 6: Participants and Damages Awarded in IIA Arbitration (Raw)"), 
             type="html", file="Table6.html",digits=0)

#Table 6a, with non-winsorized modified country names, no arbitrator info.
Table6ar1<-c(mean(final$Award_Amount[conditionb==F]),
            sd(final$Award_Amount[conditionb==F]),
            sum(conditionb==F))
Table6ar2<-c(mean(final$Award_Amount[conditionb==T]),
            sd(final$Award_Amount[conditionb==T]),
            sum(conditionb==T))
Table6ar3<-c(NA,NA,nrow(final))
Table6a<-rbind(Table6ar1,Table6ar2,Table6ar3)
colnames(Table6a)<-c("Mean","Standard Deviation","Total")
rownames(Table6a)<-c("Non-Western Respondent","Western Respondent","Total")
Table6a.toprint = Table6a
Table6a.toprint[1:2,1:2] = paste("$", round(Table6a.toprint[1:2,1:2]), sep="")
Table6a.toprint[3,1:2] = ""
print.xtable(xtable(prettyNum(Table6a.toprint,digits=0,big.mark=","),caption = "Table 6a: States and Damages Awarded in IIA Arbitration (Raw)"), 
             type="html", file="Table6a.html",digits=0)


#Table 6b, with non-winsorized modified country names, no arbitrator info, aggregate
Table6br1<-c(sum(final$Award_Amount[conditionb==F]),
             sum(conditionb==F))
Table6br2<-c(sum(final$Award_Amount[conditionb==T]),
             sum(conditionb==T))
Table6br3<-c(NA,nrow(final))
Table6b<-rbind(Table6br1,Table6br2,Table6br3)
colnames(Table6b)<-c("Sum","Number of Cases")
rownames(Table6b)<-c("Non-Advanced Respondent","Advanced Respondent","Total")
Table6b.toprint = Table6b
Table6b.toprint[1:2,1] = paste("$", round(Table6b.toprint[1:2,1]), sep="")
Table6b.toprint[3,1] = ""
print.xtable(xtable(prettyNum(Table6b.toprint,digits=0,big.mark=","),caption = "Table 6b: States and Damages Awarded in IIA Arbitration (Raw)"), 
             type="html", file="Table6b.html",digits=0)


#11. Running alternative model using other data.
Model2a <- t.test(Award_Amount~Advanced,paired=FALSE)
t<-Model2a$statistic[[1]]
df<-Model2a$parameter[[1]]
r<-sqrt(t^2/(t^2+df))  

#Running Mann WHitney: http://yatani.jp/HCIstats/MannWhitney#EffectSize
Model2c<-wilcox.test(Award_Amount~Advanced,paired=FALSE)
install.packages("coin")
library(coin)
g <- factor(c(rep("Award_Amount", length(Award_Amount)), rep("Advanced", length(Advanced))))
v <- c(Award_Amount, Advanced)
wilcox_test(v ~ g, distribution="exact")
(-3.4431)/sqrt(49)